{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 使用逻辑回归进行手写数字识别"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'tools'",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mModuleNotFoundError\u001B[0m                       Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/380848900.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      4\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0msklearn\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mlinear_model\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      5\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0msklearn\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmetrics\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0maccuracy_score\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 6\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mtools\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mRead_Minist_Tool\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[1;33m*\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      7\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mjoblib\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;31mModuleNotFoundError\u001B[0m: No module named 'tools'"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib as mpl\n",
    "from sklearn import linear_model\n",
    "from sklearn.metrics import accuracy_score\n",
    "from tools.Read_Minist_Tool import *\n",
    "# from sklearn.externals import joblib\n",
    "import joblib"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'load_train_images' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/1271029365.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mtrain_images\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mload_train_images\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      2\u001B[0m \u001B[0mtrain_labels\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mload_train_labels\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      3\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      4\u001B[0m \u001B[0mtest_images\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mload_test_images\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      5\u001B[0m \u001B[0mtest_labels\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mload_test_labels\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;31mNameError\u001B[0m: name 'load_train_images' is not defined"
     ]
    }
   ],
   "source": [
    "train_images = load_train_images()\n",
    "train_labels = load_train_labels()\n",
    "\n",
    "test_images = load_test_images()\n",
    "test_labels = load_test_labels()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_images' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/909147007.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mtrain_images\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mshape\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m: name 'train_images' is not defined"
     ]
    }
   ],
   "source": [
    "train_images.shape"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "##### 查看前十个数据及其标签以读取是否正确"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_labels' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/3808016203.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[1;32mfor\u001B[0m \u001B[0mi\u001B[0m \u001B[1;32min\u001B[0m \u001B[0mrange\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;36m10\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 2\u001B[1;33m     \u001B[0mprint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtrain_labels\u001B[0m\u001B[1;33m[\u001B[0m\u001B[0mi\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      3\u001B[0m     \u001B[0mplt\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mimshow\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtrain_images\u001B[0m\u001B[1;33m[\u001B[0m\u001B[0mi\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mcmap\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;34m'gray'\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      4\u001B[0m     \u001B[0mplt\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mshow\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;31mNameError\u001B[0m: name 'train_labels' is not defined"
     ]
    }
   ],
   "source": [
    "for i in range(10):\n",
    "    print(train_labels[i])\n",
    "    plt.imshow(train_images[i], cmap='gray')\n",
    "    plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "##### 构建训练数据特征\n",
    "把X转换成6000行，28*28列的矩阵。"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'train_images' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/3464704092.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mX\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mtrain_images\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mreshape\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtrain_images\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mshape\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;36m0\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m-\u001B[0m\u001B[1;36m1\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m: name 'train_images' is not defined"
     ]
    }
   ],
   "source": [
    "X = train_images.reshape(train_images.shape[0],-1)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "数据归一化"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'X' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/1423481241.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mX\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mX\u001B[0m\u001B[1;33m/\u001B[0m\u001B[1;36m255\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m: name 'X' is not defined"
     ]
    }
   ],
   "source": [
    "X = X/255"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "为了程序尽快训练完，取一部分训练数据进行训练，实际操作时不需这样做"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "X = X[0:1000,]\n",
    "train_labels = train_labels[0:1000,]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "##### 代入模型"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'X' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/291249872.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[0;32m      1\u001B[0m \u001B[0mmodel\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mlinear_model\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mLogisticRegression\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mC\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;36m100.0\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 2\u001B[1;33m \u001B[0mmodel\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mfit\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mX\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0mtrain_labels\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m: name 'X' is not defined"
     ]
    }
   ],
   "source": [
    "model = linear_model.LogisticRegression(C=100.0)\n",
    "model.fit(X,train_labels)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "'LogisticRegression' object has no attribute 'coef_'",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/706179168.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mprint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mmodel\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mcoef_\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      2\u001B[0m \u001B[0mprint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mmodel\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mintercept_\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;31mAttributeError\u001B[0m: 'LogisticRegression' object has no attribute 'coef_'"
     ]
    }
   ],
   "source": [
    "print(model.coef_)\n",
    "print(model.intercept_)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "##### 测试模型"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "测试集精度"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'test_images' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/3291847502.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mX_test\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mtest_images\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mreshape\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtest_images\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mshape\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;36m0\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m-\u001B[0m\u001B[1;36m1\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      2\u001B[0m \u001B[0mX_test\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mX_test\u001B[0m\u001B[1;33m/\u001B[0m\u001B[1;36m255\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      3\u001B[0m \u001B[0my_test_hat\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mmodel\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpredict\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mX_test\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m      4\u001B[0m \u001B[0mprint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;34m'测试集精确度='\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0maccuracy_score\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtest_labels\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0my_test_hat\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;31mNameError\u001B[0m: name 'test_images' is not defined"
     ]
    }
   ],
   "source": [
    "X_test = test_images.reshape(test_images.shape[0],-1)\n",
    "X_test = X_test/255\n",
    "y_test_hat = model.predict(X_test)\n",
    "print('测试集精确度=',accuracy_score(test_labels,y_test_hat))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "训练集精度"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'X' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[1;31mNameError\u001B[0m                                 Traceback (most recent call last)",
      "\u001B[1;32m~\\AppData\\Local\\Temp/ipykernel_266600/3609387167.py\u001B[0m in \u001B[0;36m<module>\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0my_train_hat\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mmodel\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpredict\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mX\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m      2\u001B[0m \u001B[0mprint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;34m'训练集精确度='\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0maccuracy_score\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mtrain_labels\u001B[0m\u001B[1;33m,\u001B[0m\u001B[0my_train_hat\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
      "\u001B[1;31mNameError\u001B[0m: name 'X' is not defined"
     ]
    }
   ],
   "source": [
    "y_train_hat = model.predict(X)\n",
    "print('训练集精确度=',accuracy_score(train_labels,y_train_hat))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}